# # -*- coding: utf-8 -*-
# import random
# import scrapy
# from scrapy import Request
#
# from zc_core.util.http_util import *
# from crccmall.rules import *
# from zc_core.dao.spu_pool_dao import SpuPoolDao
# from zc_core.dao.batch_dao import BatchDao
# from zc_core.util.batch_gen import time_to_batch_no
# from zc_core.util.done_filter import DoneFilter
# from datetime import datetime
#
#
# class FullSpider(BaseSpider):
#     name = 'full_detail'
#     custom_settings = {
#         'CONCURRENT_REQUESTS': 8,
#         # 'DOWNLOAD_DELAY': 0.1,
#         'CONCURRENT_REQUESTS_PER_DOMAIN': 8,
#         'CONCURRENT_REQUESTS_PER_IP': 8,
#     }
#
#     item_url = 'https://www.crccmall.com/api/merchandise/ecGoods/crccPortal/queryGoodsInfo?uuids={}'
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(FullSpider, self).__init__(*args, **kwargs)
#         if not batchNo:
#             self.batch_no = time_to_batch_no(datetime.now())
#         else:
#             self.batch_no = int(batchNo)
#         # 创建批次记录
#         BatchDao().create_batch(self.batch_no)
#         # 避免重复采集
#         self.done_filter = DoneFilter(self.batch_no, fields={'spuId': 1}, filter_key='spuId')
#
#     def start_requests(self):
#         pool_list = SpuPoolDao().get_spu_pool_list(
#             fields={'_id': 1, 'batchNo': 1, 'soldCount': 1, 'supplierName': 1, 'supplierId': 1, 'offlineTime': 1,
#                     'catalog1Id': 1, 'catalog1Name': 1, 'catalog2Id': 1,
#                     'catalog2Name': 1, 'catalog3Id': 1, 'catalog3Name': 1})
#         pool_list = [i for i in pool_list if i.get('catalog2Name') in ["办公耗材", "办公设备"]]
#         self.logger.info('全量：%s' % (len(pool_list)))
#         for spu in pool_list:
#             spu_id = spu.get('_id')
#             sold_count = spu.get('soldCount')
#             offline_time = spu.get('offlineTime', 0)
#             supplier_id = spu.get('supplierId')
#             supplier_name = spu.get('supplierName')
#             settings = get_project_settings()
#             if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
#                 self.logger.info('忽略: [%s][%s]', spu_id, offline_time)
#                 continue
#             # 避免重复采集
#             if self.done_filter.contains(spu_id) and not settings.get('FORCE_RECOVER', False):
#                 self.logger.info('已采: [%s]', spu_id)
#                 continue
#             # 采集商品
#             yield Request(
#                 url=self.item_url.format(spu_id),
#                 callback=self.parse_item_data,
#                 errback=self.error_back,
#                 priority=260,
#                 meta={
#                     'reqType': 'item',
#                     'batchNo': self.batch_no,
#                     'spuId': spu_id,
#                     'catalog1Id': spu.get('catalog1Id'),
#                     'catalog1Name': spu.get('catalog1Name'),
#                     'catalog2Id': spu.get('catalog2Id'),
#                     'catalog2Name': spu.get('catalog2Name'),
#                     'catalog3Id': spu.get('catalog3Id'),
#                     'catalog3Name': spu.get('catalog3Name'),
#                     'soldCount': sold_count,
#                     'supplierName': supplier_name,
#                     'supplierId': supplier_id
#                 },
#                 headers={
#                     "Accept": "application/json, text/plain, */*",
#                     "Accept-Encoding": "gzip, deflate, br",
#                     "Accept-Language": "zh-CN,zh;q=0.9",
#                     "Authorization": "undefined",
#                     "Cache-Control": "no-cache",
#                     "Connection": "keep-alive",
#                     "Cookie": "HWWAFSESID=4160c73ff2543e5b74; HWWAFSESTIME=1622621926983; looyu_id=7677e2e3a3d9b59df01fbe526d2fb9ae_20002519%3A3; looyu_20002519=v%3Af377faf040c9021b9b56dafa4592cded%2Cref%3A%2Cr%3A%2Cmon%3A//m6815.talk99.cn/monitor%2Cp0%3Ahttps%253A//www.crccmall.com/static/crccmall/%2523/officeArea; _99_mon=%5B0%2C0%2C0%5D; phone=; regType=",
#                     "Host": "www.crccmall.com",
#                     "If-Modified-Since": "0",
#                     "Pragma": "no-cache",
#                     "Referer": "https://www.crccmall.com/static/crccmall/",
#                     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36",
#                     "X-Requested-With": "XMLHttpRequest",
#                 }
#             )
#
#     # 处理ItemData
#     def parse_item_data(self, response):
#         # 处理商品详情页
#         # print(response)
#         for data in parse_item_data(response):
#             self.logger.info('商品: [%s]' % data.get('skuId'))
#             yield data
#

